{
 "cells": [
  {
   "cell_type": "markdown",
   "source": [
    "# A 5 minute tour of SynapseML"
   ],
   "metadata": {
    "nteract": {
     "transient": {
      "deleting": false
     }
    }
   }
  },
  {
   "cell_type": "code",
   "source": [
    "from pyspark.sql import SparkSession\n",
    "from synapse.ml.core.platform import running_on_synapse\n",
    "\n",
    "spark = SparkSession.builder.getOrCreate()\n",
    "\n",
    "if running_on_synapse():\n",
    "    from notebookutils.visualization import display"
   ],
   "outputs": [],
   "execution_count": null,
   "metadata": {
    "jupyter": {
     "source_hidden": false,
     "outputs_hidden": false
    },
    "nteract": {
     "transient": {
      "deleting": false
     }
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "# Step 1: Load our Dataset"
   ],
   "metadata": {
    "nteract": {
     "transient": {
      "deleting": false
     }
    }
   }
  },
  {
   "cell_type": "code",
   "source": [
    "train, test = (\n",
    "    spark.read.parquet(\n",
    "        \"wasbs://publicwasb@mmlspark.blob.core.windows.net/BookReviewsFromAmazon10K.parquet\"\n",
    "    )\n",
    "    .limit(1000)\n",
    "    .cache()\n",
    "    .randomSplit([0.8, 0.2])\n",
    ")\n",
    "\n",
    "display(train)"
   ],
   "outputs": [
    {
     "output_type": "display_data",
     "data": {
      "application/vnd.livy.statement-meta+json": {
       "spark_pool": null,
       "session_id": null,
       "statement_id": null,
       "state": "cancelled",
       "livy_statement_state": null,
       "queued_time": "2022-09-02T20:18:04.1274612Z",
       "session_start_time": null,
       "execution_start_time": null,
       "execution_finish_time": "2022-09-02T20:18:21.4549018Z",
       "spark_jobs": null
      },
      "text/plain": "StatementMeta(, , , Cancelled, )"
     },
     "metadata": {}
    }
   ],
   "execution_count": null,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "title": "",
     "showTitle": false,
     "inputWidgets": {},
     "nuid": "396e4834-0140-418b-8867-4a0e20c547d6"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "# Step 2: Make our Model"
   ],
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "title": "",
     "showTitle": false,
     "inputWidgets": {},
     "nuid": "51b3c66a-3582-429a-a969-c2fb66e77c49"
    }
   }
  },
  {
   "cell_type": "code",
   "source": [
    "from pyspark.ml import Pipeline\n",
    "from synapse.ml.featurize.text import TextFeaturizer\n",
    "from synapse.ml.lightgbm import LightGBMRegressor\n",
    "\n",
    "model = Pipeline(\n",
    "    stages=[\n",
    "        TextFeaturizer(inputCol=\"text\", outputCol=\"features\"),\n",
    "        LightGBMRegressor(featuresCol=\"features\", labelCol=\"rating\"),\n",
    "    ]\n",
    ").fit(train)"
   ],
   "outputs": [
    {
     "output_type": "display_data",
     "data": {
      "application/vnd.livy.statement-meta+json": {
       "spark_pool": null,
       "session_id": null,
       "statement_id": null,
       "state": "cancelled",
       "livy_statement_state": null,
       "queued_time": "2022-09-02T20:18:04.2792548Z",
       "session_start_time": null,
       "execution_start_time": null,
       "execution_finish_time": "2022-09-02T20:18:21.4560511Z",
       "spark_jobs": null
      },
      "text/plain": "StatementMeta(, , , Cancelled, )"
     },
     "metadata": {}
    }
   ],
   "execution_count": null,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "title": "",
     "showTitle": false,
     "inputWidgets": {},
     "nuid": "df76cfec-a945-469c-a9df-47e9144e37eb"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "# Step 3: Predict!"
   ],
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "title": "",
     "showTitle": false,
     "inputWidgets": {},
     "nuid": "e0d23e7d-14c7-4a38-9983-fac290e97def"
    }
   }
  },
  {
   "cell_type": "code",
   "source": [
    "display(model.transform(test))"
   ],
   "outputs": [
    {
     "output_type": "display_data",
     "data": {
      "application/vnd.livy.statement-meta+json": {
       "spark_pool": null,
       "session_id": null,
       "statement_id": null,
       "state": "cancelled",
       "livy_statement_state": null,
       "queued_time": "2022-09-02T20:18:04.3829014Z",
       "session_start_time": null,
       "execution_start_time": null,
       "execution_finish_time": "2022-09-02T20:18:21.4570637Z",
       "spark_jobs": null
      },
      "text/plain": "StatementMeta(, , , Cancelled, )"
     },
     "metadata": {}
    }
   ],
   "execution_count": null,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "title": "",
     "showTitle": false,
     "inputWidgets": {},
     "nuid": "70845164-e6df-4948-aa68-d8f4c5537eaa"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "# Alternate route: Let the Cognitive Services handle it"
   ],
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "title": "",
     "showTitle": false,
     "inputWidgets": {},
     "nuid": "cbc5ddec-2984-4fea-b164-d1127f46f919"
    }
   }
  },
  {
   "cell_type": "code",
   "source": [
    "from synapse.ml.cognitive import TextSentiment\n",
    "from synapse.ml.core.platform import find_secret\n",
    "\n",
    "model = TextSentiment(\n",
    "    textCol=\"text\",\n",
    "    outputCol=\"sentiment\",\n",
    "    subscriptionKey=find_secret(\"cognitive-api-key\"),\n",
    ").setLocation(\"eastus\")\n",
    "\n",
    "display(model.transform(test))"
   ],
   "outputs": [
    {
     "output_type": "display_data",
     "data": {
      "application/vnd.livy.statement-meta+json": {
       "spark_pool": null,
       "session_id": null,
       "statement_id": null,
       "state": "cancelled",
       "livy_statement_state": null,
       "queued_time": "2022-09-02T20:18:04.4950824Z",
       "session_start_time": null,
       "execution_start_time": null,
       "execution_finish_time": "2022-09-02T20:18:21.4580363Z",
       "spark_jobs": null
      },
      "text/plain": "StatementMeta(, , , Cancelled, )"
     },
     "metadata": {}
    }
   ],
   "execution_count": null,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "title": "",
     "showTitle": false,
     "inputWidgets": {},
     "nuid": "f22b1c1a-b0c9-43e9-bcb0-b29624b4a76a"
    },
    "pycharm": {
     "name": "#%%\n"
    }
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "name": "synapse_pyspark",
   "display_name": "Synapse PySpark"
  },
  "language_info": {
   "name": "python"
  },
  "description": null,
  "save_output": true,
  "synapse_widget": {
   "version": "0.1",
   "state": {}
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}